	
cap program drop finbunchest
program define finbunchest, rclass
	args ub threshold
	
	*********************************************************************************
	*	Step 4: Collapse Data into counts of firms by revenue bins					*
	*********************************************************************************
			
					
		collapse (count) finscore, by(scoregroups)   //score is raw score; scoregroups = rounded scores
		
			*** Score polynomial terms 
		
			replace scoregroups = 0.1 if scoregroups == 0 

			gen overallscore = scoregroups
			gen overallscore2 = overallscore^2
			gen overallscore3 = overallscore^3
			gen overallscore4 = overallscore^4
	
		*** Create round number dummies: 
		
		* Create dummies for 85, 87.5, 90, 92.5, 95, 97.5, 100
		
		gen roundnumcat_1=0
		replace roundnumcat_1 = 1 if scoregroups==85
		gen roundnumcat_2=0
		replace roundnumcat_2 = 1 if scoregroups==87.4
		gen roundnumcat_3=0
		replace roundnumcat_3 = 1 if scoregroups==90
		gen roundnumcat_4=0
		replace roundnumcat_4 = 1 if scoregroups==92.4
		gen roundnumcat_5=0
		replace roundnumcat_5 = 1 if scoregroups==95
		gen roundnumcat_6=0
		replace roundnumcat_6 = 1 if scoregroups==97.4
		gen roundnumcat_7=0
		replace roundnumcat_7 = 1 if scoregroups==100

	
			
	*** Estimate Bunching: 
	
	
	mat results = (`ub',0,0,0,0,0,0,0)
	local lb1 = `threshold'-0.2

		forval i = `=`threshold'-.2' (-0.2) 64 {
		
			*di "Testing lower bound: `i'"
			
			cap drop exclregn exclregndummy 
			
			qui gen exclregn = overallscore if inrange(scoregroups,`i',`ub') // define dummies for bunching region
				qui replace exclregn = 0 if exclregn ==. 
			
			qui egen exclregndummy = group(exclregn)
			qui tab exclregndummy, matrow(regndummynames)
			local maxcats = rowsof(regndummynames) // save dummy name of the last score group  maxcats = 23
			qui levelsof exclregndummy if exclregn == `threshold', local(notch) //save dummy name of the notch group
			
			*Estimate counterfactual: 
						
			qui reg finscore i.exclregndummy overallscore overallscore2 overallscore3 overallscore4 roundnumcat_*
			
			* Identify starting and ending columns of results matrix with the coefficient dummies for excluded region
			
			mat A = e(b) // save results matrix
			
			local startcol =  colnumb(A,"2.exclregndummy")
			local notchcol = colnumb(A,"`notch'.exclregndummy")
			local afternotch = `notchcol' + 1
			local endcol =  colnumb(A,"`maxcats'.exclregndummy")   
			
*			di "Start col: `startcol', notchcol: `notchcol', endcol: `endcol'"

			* Add up the missing mass:    
			
			mkmat finscore if inrange(overallscore,`i',`=`threshold'-.1')
				mat M_actual = finscore   // M_actual is 1x1
			
			mkmat overallscore if inrange(overallscore,`i',`=`threshold'-.1')
				mat Om = overallscore   //Om is 1x1
				
			forval num = 1/7{
				mkmat roundnumcat_`num' if inrange(overallscore,`i',`=`threshold'-.1')
				mat Rm_`num' = roundnumcat_`num'
			}
				
			local Mlen = rowsof(Om)   //missing mass length of matrix
			mat C = J(1,`Mlen',1)  // C is 1x1
			
			mata : st_matrix("Om2", st_matrix("Om"):^2)
			mata : st_matrix("Om3", st_matrix("Om"):^3)
			mata : st_matrix("Om4", st_matrix("Om"):^4)

			mat M_est = _b[_cons]*C' + _b[overallscore]*Om + _b[overallscore2]*Om2 + _b[overallscore3]*Om3 + _b[overallscore4]*Om4 + _b[roundnumcat_1]*Rm_1 + _b[roundnumcat_2]*Rm_2  + _b[roundnumcat_3]*Rm_3 + _b[roundnumcat_4]*Rm_4 + _b[roundnumcat_5]*Rm_5 + _b[roundnumcat_6]*Rm_6 + _b[roundnumcat_7]*Rm_7
			
				
			mat M_missing = M_est - M_actual // mass in excess of counterfactual density  36x1
			
			mat D = M_missing'*C'  // 1x1
			
			local missingmass = D[1,1]
			 
			* Add up the excess mass:     
			
			mkmat finscore if inrange(overallscore,`=`threshold'-.1',`ub')
				mat B_actual = finscore    // B_actual is 21x1
			
			mkmat overallscore if inrange(overallscore,`=`threshold'-.1',`ub')
				mat Ob = overallscore   // Ob is 21x1
				
			forval num = 1/7{
				mkmat roundnumcat_`num' if inrange(overallscore,`=`threshold'-.1',`ub')
				mat Rb_`num' = roundnumcat_`num'
			}
						
			local Blen = rowsof(Ob)   //missing mass length of matrix
			mat C = J(1,`Blen',1)    
				
			mata : st_matrix("Ob2", st_matrix("Ob"):^2)
			mata : st_matrix("Ob3", st_matrix("Ob"):^3)
			mata : st_matrix("Ob4", st_matrix("Ob"):^4)

			mat B_est = _b[_cons]*C' + _b[overallscore]*Ob + _b[overallscore2]*Ob2 + _b[overallscore3]*Ob3 + _b[overallscore4]*Ob4  + _b[roundnumcat_1]*Rb_1 + _b[roundnumcat_2]*Rb_2  + _b[roundnumcat_3]*Rb_3 + _b[roundnumcat_4]*Rb_4 + _b[roundnumcat_5]*Rb_5 + _b[roundnumcat_6]*Rb_6 + _b[roundnumcat_7]*Rb_7
			
			
			mat B_excess = B_actual - B_est  // mass in excess of counterfactual density
			
			mat D = B_excess'*C'   // 1x1
			
			local excessmass = D[1,1]
			
			* Difference between missing and excess mass: 
			
			local diff = `excessmass' - `missingmass'
			
			di "Difference between excess and missing mass is: `diff' for upper bound `i'"
			
			* Counterfactual densities at the notch and at the upper bound:
			
			*Rounding: 

			gen rnumlb_1=0
			replace rnumlb_1 = 1 if `i'==85
			gen rnumlb_2=0
			replace rnumlb_2 = 1 if `i'==87.4
			gen rnumlb_3=0
			replace rnumlb_3 = 1 if `i'==90
			gen rnumlb_4=0
			replace rnumlb_4 = 1 if `i'==92.4
			gen rnumlb_5=0
			replace rnumlb_5 = 1 if `i'==95
			gen rnumlb_6=0
			replace rnumlb_6 = 1 if `i'==97.4
			gen rnumlb_7=0
			replace rnumlb_7 = 1 if `i'==100
			
			gen rnumthresh_1=0
			replace rnumthresh_1 = 1 if `threshold'==85
			gen rnumthresh_2=0
			replace rnumthresh_2 = 1 if `threshold'==87.4
			gen rnumthresh_3=0
			replace rnumthresh_3 = 1 if `threshold'==90
			gen rnumthresh_4=0
			replace rnumthresh_4 = 1 if `threshold'==92.4
			gen rnumthresh_5=0
			replace rnumthresh_5 = 1 if `threshold'==95
			gen rnumthresh_6=0
			replace rnumthresh_6 = 1 if `threshold'==97.4
			gen rnumthresh_7=0
			replace rnumthresh_7 = 1 if `threshold'==100
			
			gen rnumub_1=0
			replace rnumub_1 = 1 if `ub'==85
			gen rnumub_2=0
			replace rnumub_2 = 1 if `ub'==87.4
			gen rnumub_3=0
			replace rnumub_3 = 1 if `ub'==90
			gen rnumub_4=0
			replace rnumub_4 = 1 if `ub'==92.4
			gen rnumub_5=0
			replace rnumub_5 = 1 if `ub'==95
			gen rnumub_6=0
			replace rnumub_6 = 1 if `ub'==97.4
			gen rnumub_7=0
			replace rnumub_7 = 1 if `ub'==100
			

				
			local cflb = _b[_cons] + `i'*_b[overallscore]+ (`i'^2)*_b[overallscore2] + (`i'^3)* _b[overallscore3] + (`i'^4)* _b[overallscore4] + _b[roundnumcat_1]*rnumlb_1 + _b[roundnumcat_2]*rnumlb_2 + _b[roundnumcat_3]*rnumlb_3 + _b[roundnumcat_4]*rnumlb_4  + _b[roundnumcat_5]*rnumlb_5 + _b[roundnumcat_6]*rnumlb_6 + _b[roundnumcat_7]*rnumlb_7
				
			local cfnotch = _b[_cons] + `threshold'*_b[overallscore]+ (`threshold'^2)*_b[overallscore2] + (`threshold'^3)* _b[overallscore3] + (`threshold'^4)* _b[overallscore4] + _b[roundnumcat_1]*rnumthresh_1 + _b[roundnumcat_2]*rnumthresh_2 + _b[roundnumcat_3]*rnumthresh_3 + _b[roundnumcat_4]*rnumthresh_4  + _b[roundnumcat_5]*rnumthresh_5 + _b[roundnumcat_6]*rnumthresh_6 + _b[roundnumcat_7]*rnumthresh_7
				
			local cfub = _b[_cons] + `ub'*_b[overallscore]+ (`ub'^2)*_b[overallscore2] + (`ub'^3)* _b[overallscore3] + (`ub'^4)* _b[overallscore4] + _b[roundnumcat_1]*rnumub_1 + _b[roundnumcat_2]*rnumub_2 + _b[roundnumcat_3]*rnumub_3 + _b[roundnumcat_4]*rnumub_4  + _b[roundnumcat_5]*rnumub_5 + _b[roundnumcat_6]*rnumub_6 + _b[roundnumcat_7]*rnumub_7


drop rnumlb_* rnumthresh_* 	rnumub_*	
									
			* Estimated average bunching: 
			
			local b_av = `excessmass'/(0.5*(`cflb'+`cfub'))
			
			mat resultsappend = (`ub',`i',`excessmass',`missingmass',`diff',`cflb',`cfub',`b_av')
			mat results = results \ resultsappend
*			matrix list results
			
		}
		
		mat colnames results = ub lb excess missing diff cflb cfub b_av
*		matrix list results
		
		clear 
		svmat results, names(col)
		
		* Keep the bunching estimate for the lowest difference between excess and missing mass: 
		
		drop if lb == 0 
		gen absdiff = abs(diff)
		sort absdiff
		
		keep if _n == 1
		mkmat _all, matrix(R)
				
		
		return scalar lb_est = R[1,2]
		return scalar excess_est = R[1,3]
		return scalar b_av_est = R[1,8]  
end
